/*

Instructions:
(great guide if you have any issues getting python to work: 
https://blog.uvm.edu/tbplante/2020/07/22/getting-python-to-work-with-stata-in-windows/)

1. make sure to have python installed
	link if not downloaded: https://www.python.org/downloads

2. install packages:
	with command prompt open, enter these lines one at a time:
		py -m pip install --upgrade pip
		py -m pip install pandas
		py -m pip install openpyxl
		py -m pip install requests
		py -m pip install selenium
		*Note: you may have to type python or python3 instead of py depending on your python installation
		
3. run the do file below

*/



**********Implementing File Names****************
python:
import os

#Making subfolders
if not os.path.isdir('raw'):
    os.mkdir('raw')
if not os.path.isdir('dta'):
    os.mkdir('dta')


#note, these files save in the same folder as the do file, with the name before the slash connotating the subfolder names. You can determine the file they save to by adding a file path before their name.

NATO_file = 'raw/NATO_data_final.csv' 
eda_file = 'raw/eda_data_final.csv'
combined_filename = "dta/nato_eda_compiled_data.dta"

#note, edit this to your desired folder to save the nato files to
path = r"C:\Users\jordan.becker\My Drive\1_Research\disagg_milex_datafeature\raw"

end

****************NATO Data*************
python:

NATO_file = 'NATO_data_final.csv' 

import pandas as pd
import math
import requests
from selenium import webdriver
import time


#Required Packages: pandas, openpyxl, selenium





options=webdriver.ChromeOptions()
prefs={"download.default_directory" : path}
options.add_experimental_option("prefs",prefs)

#filename for output, saves in local directory
NATO_file = NATO_file 


#list of links to retrieve files from, most recent years first
list_links = ['https://www.nato.int/docu/pr/2022/pr-2022-105-en.xlsx',
			  'https://www.nato.int/SGReport/2021/Defence%20Expenditure%202021_EN.xlsx',
              'https://www.nato.int/docu/pr/2021/pr-2021-030-en.xlsx',
              'https://www.nato.int/docu/pr/2019/pr-2019-123-en.xlsx',
              'https://www.nato.int/docu/pr/2019/PR-2019-034-eng.xlsx',
              'https://www.nato.int/docu/pr/2018/pr-cp2018-16.xlsx',
              'http://www.nato.int/docu/pr/2017/pr2017-045.xlsx'   
			  ]


cleaned_files = []

#Iterating through each year
for i in range(len(list_links)):
    url = list_links[i]
    filename = url.split('/')
    filename = filename[len(filename)-1]
    filename = filename.replace("%20"," ")
    try:
        xl_file = pd.ExcelFile(path.replace("\\",'/') +  '/' + filename)
        sheet_names = xl_file.sheet_names
    except:
        driver = webdriver.Chrome('./chromedriver', options=options)
        driver.get(url)
        time.sleep(1)
        xl_file = pd.ExcelFile(path.replace("\\",'/') +  '/' + filename)
        sheet_names = xl_file.sheet_names

    #removing useless sheet
    for item in sheet_names:
        if 'CHART' in item:
            sheet_names.remove(item)

    #Cleaning and Storing data for each sheet
    bigDF = pd.DataFrame()
    for x in sheet_names:
        add = 0 #this is the start column of the table, found through the location of the name
        df = pd.read_excel(xl_file,sheet_name=x)

        #getting table start and end points
        title = ''
        id1 = 0 #Year index
        id2 = 0 #Start of first table index
        id3 = 0 # End of First table index
        id4 = 0 #Start of Second Table index
        id5 = 0 #End of second table index
        for index, row in df.iterrows():
            if 'table' in str(df.iloc[index,0]).lower():
                title = str(df.iloc[index,0]).split(': ')
                title = title[1].replace(' ', '_')
            if 'table' in str(df.iloc[index,1]).lower():
                title = str(df.iloc[index,1]).split(': ')
                title = title[1].replace(' ','_')
                add = add + 1
            if id1 == 0 and ('0' in str(df.iloc[index,add + 2]) or '1' in str(df.iloc[index,add + 2]) or '2' in str(df.iloc[index,add + 2])):
                id1 = index
            if id1 != 0 and id2 == 0 and (isinstance(df.iloc[index,add + 1], str) or (not math.isnan(df.iloc[index,add + 1]) and isinstance(df.iloc[index,add + 1], str))):
                id2 = index
            if id1 != 0 and id2 != 0 and id3 == 0 and not isinstance(df.iloc[index,add + 1],str) and (math.isnan(df.iloc[index,add + 1])):
                id3 = index
            if index+1 < len(df) and id1 != 0 and id2 != 0 and id3 != 0 and id4 == 0  and isinstance(df.iloc[index,add], str) \
                    and (isinstance(df.iloc[index + 1,add + 1], str) or not math.isnan(df.iloc[index + 1,add + 2])) and math.isnan(df.iloc[index,add + 2]):#((isinstance(df.iloc[index,add + 1], str) and 'Note' not in df.iloc[index,add + 1])or (not math.isnan(df.iloc[index,add + 2]) and isinstance(df.iloc[index,add], str))):
                id4 = index + 1
            if(index+2 < len(df) and id1 != 0 and id2 != 0 and id3 != 0 and id4 == 0 and isinstance(df.iloc[index,add], str) \
            and (isinstance(df.iloc[index + 2,add + 1], str) or not math.isnan(df.iloc[index + 2,add + 2])) and math.isnan(df.iloc[index,add + 2]) \
            and math.isnan(df.iloc[index + 1,add ]) and math.isnan(df.iloc[index + 1,add + 1])):
                id4 = index + 1
            if id1 != 0 and id2 != 0 and id3 != 0 and id4 != 0 and id4 < index and id5 == 0 and not isinstance(df.iloc[index,add + 1],str) and (math.isnan(df.iloc[index,add + 2])):
                id5 = index

        if id4 > 0:
            #Process for when there is two tables in a sheet
            row_list = [i for i in range(id2,id3)]
            cols = list(df.iloc[id1, add + 2:])
            cols.insert(0,'country')
            cols2 = cols.copy()
            tab1 = df.iloc[row_list, add + 1:]
            tab1 = tab1.set_axis(cols, axis=1, inplace=False)
            name = str(df.iloc[id2-1,:][add])
            if "NATO" in name or "nan" in name:
                name = str(df.iloc[id2 - 2, :][add])
            name = title + '_' + name.replace(' ', '_')
            tab1 = pd.melt(tab1, id_vars='country', value_vars=cols.remove('country'),
                           var_name='year', value_name=name)
            tab1= tab1.dropna(how='any', subset=['country','year'])
            tab1 = tab1.loc[:,['country','year',name]]

            #Cleaning Country
            country = list(tab1['country'])
            countryNew = []
            for item in country:
                item = item.split("   ")
                item = item[0]
                item = item.replace('*','')
                item = item.strip()
                countryNew.append(item)
            tab1['country'] = countryNew

            #merging to main
            try:
                bigDF = pd.merge(bigDF, tab1, how = 'left', on = ['country','year'])
            except:
                bigDF = tab1

            #same for table 2
            row_list = [i for i in range(id4, id5)]
            tab2 = df.iloc[row_list, add + 1:]
            tab2 = tab2.set_axis(cols2, axis=1, inplace=False)
            name = str(df.loc[id4 - 1, :][add])
            name = title + '_' + name.replace(' ', '_')

            #turning data into long format
            tab2 = pd.melt(tab2, id_vars='country', value_vars=cols2.remove('country'),
                          var_name='year', value_name=name)
            tab2 = tab2.loc[:, ['country', 'year', name]]
            tab2= tab2.dropna(how='any', subset=['country','year'])

            #Cleaning Country
            country = list(tab2['country'])
            countryNew = []
            for item in country:
                item = str(item)
                item = item.split("   ")
                item = item[0]
                item = item.replace('*', '')
                item = item.strip()
                countryNew.append(item)
            #print(countryNew)
            tab2['country'] = countryNew

            #merging to main
            bigDF = pd.merge(bigDF, tab2, how='left', on=['country', 'year'])

        else:
            #Process for when there is one table in a sheet
            row_list = [i for i in range(id2, id3)]
            cols = list(df.iloc[id1, add + 2:])
            cols.insert(0, 'country')
            cols2 = cols.copy()
            tab1 = df.iloc[row_list, add + 1:]
            tab1 = tab1.set_axis(cols, axis=1, inplace=False)
            name = str(df.loc[id1 - 2, :][add])
            name = title + '_'+ name.replace(' ','_')

            #Turning data into long format
            tab1 = pd.melt(tab1, id_vars='country', value_vars=cols.remove('country'),
                           var_name='year', value_name=name)
            tab1 = tab1.dropna(how='any', subset=['country', 'year'])
            #print(tab1)
            tab1 = tab1.loc[:, ['country', 'year', name]]

            #cleaning country
            country = list(tab1['country'])
            countryNew = []
            for item in country:
                item = item.split("   ")
                item = item[0]
                item = item.replace('*', '')
                item = item.strip()
                countryNew.append(item)
            tab1['country'] = countryNew


            #merging to main
            try:
                bigDF = pd.merge(bigDF, tab1, how = 'left', on = ['country','year'])
            except:
                bigDF = tab1

    # cleaning year
    year = list(bigDF['year'])
    yearNew = []
    for item in year:
        item = str(item)
        item = item.replace('e', '')
        yearNew.append(float(item))
    bigDF['year'] = yearNew
    bigDF['year'] = bigDF['year'].astype(float)
	
    #fixing turkey for 2022 file
    bigDF.loc[bigDF.country == "Türkiye", 'country'] = "Turkey"
	


    #Fixing rogue columns
    if 'Military_personnel_Military_personnel_(thousands)' in list(bigDF.columns):
        bigDF = bigDF.rename({'Military_personnel_Military_personnel_(thousands)': 'Military_personnel_Thousands'}, axis=1)

    #This is just for 2016 data
    if 'Defence_expenditure_per_capita_and_military_personnel_Defence_expenditure_per_capita_(2010_US_dollars)' in bigDF.columns:
        bigDF = bigDF.drop('Defence_expenditure_per_capita_and_military_personnel_Defence_expenditure_per_capita_(2010_US_dollars)', axis = 1)
        bigDF = bigDF.rename({'Defence_expenditure_per_capita_and_military_personnel_Military_personnel_(thousands)'
            : 'GDP_per_capita_and_defence_expenditure_per_capita_Defence_expenditure_per_capita_(US_dollars)',
            'GDP_GDP_per_capita_(thousand_US_dollars)' : 'GDP_per_capita_and_defence_expenditure_per_capita_GDP_per_capita_(thousand_US_dollars)',
            'GDP_Real_GDP_(billion_US_dollars)' : 'Real_GDP_Billion_US_dollars_(2010_prices_and_exchange_rates)' ,
            'Distribution_of_defence_expenditure_by_main_category_Infrastructure_(a)':'Distribution_of_defence_expenditure_by_main_category_Infrastructure_(c)',
            'Distribution_of_defence_expenditure_by_main_category_Other_(b)' : 'Distribution_of_defence_expenditure_by_main_category_Other_(d)'
        }, axis=1)

    cleaned_files.append(bigDF)

#Merging files together, starting highest year down
comb_df = pd.DataFrame()
for i in range(len(cleaned_files)):
    df = cleaned_files[i]
    #df.loc[df['country']  == "Türkiye", 'country'] = "Turkey"
    df = df.reset_index()
    if i == 0:
        comb_df = df
    else:
        for index, row in df.iterrows():
            #print(row['country'])
            if not ((comb_df['country'] == row['country']) & (comb_df['year'] == row['year'])).any():
                row_df = row.to_frame().T
                comb_df = pd.concat([comb_df,row_df], ignore_index = True)
comb_df = comb_df.reset_index()



#Cleaning Varaible Names:
comb_df = comb_df.drop(['Defence_expenditure_real_change_2014-2020_Million_US_dollars_(2015_prices_and_exchange_rates)', #'Defence_expenditure_real_change_2014-2021_Million_US_dollars_(2015_prices_and_exchange_rates)',
                        'index', 'level_0'],
                       axis = 1)
comb_df = comb_df.rename({'Defence_expenditure_Current_prices'
            : 'Defence_expenditure_in_millions_national_currency',
            'Defence_expenditure_Constant_2015_prices_' : 'Defence_expenditure_millions_national_currency_2015_prices',
            'Defence_expenditure_Current_prices_and_exchange_rates' : 'Defence_expenditure_millions_USD' ,
            'Defence_expenditure_Constant_2015_prices_and_exchange_rates':'Defence_expenditure_millions_USD_2015_prices',
            'Defence_expenditure_as_a_share_of_GDP_and_annual_real_change_Share_of_real_GDP_(%)' : 'Defence_expenditure_Share_of_real_GDP_(%)',
            'Defence_expenditure_as_a_share_of_GDP_and_annual_real_change_Annual_real_change_(%)': 'Defence_expenditure_Annual_real_change_(%)',
            'GDP_per_capita_and_defence_expenditure_per_capita_GDP_per_capita_(thousand_US_dollars)' : 'GDP_per_capita_(thousand_US_dollars)',
            'GDP_per_capita_and_defence_expenditure_per_capita_Defence_expenditure_per_capita_(US_dollars)' : 'Defence_expenditure_per_capita_(US_dollars)',
            'Distribution_of_defence_expenditure_by_main_category_Equipment_(a)' : 'Equipment',
            'Distribution_of_defence_expenditure_by_main_category_Personnel_(b)' : 'Personnel',
            'Distribution_of_defence_expenditure_by_main_category_Infrastructure_(c)' : 'Infrastructure',
            'Distribution_of_defence_expenditure_by_main_category_Other_(d)' : 'Other',
            'Defence_expenditure_Constant_2010_prices_' :  'Defence_expenditure_millions_national_currency_2010_prices',
            'Defence_expenditure_Constant_2010_prices_and_exchange_rates' : 'Defence_expenditure_millions_USD_2010_prices'

        }, axis=1)

print(f"Final Value Columns:\n {comb_df.columns}")



#sorting values and reseting index
comb_df['year1'] = -comb_df['year']
comb_df = comb_df.sort_values(by = ['year1','country'])
comb_df = comb_df.drop(['year1'], axis = 1)
comb_df = comb_df.reset_index(drop = True)

#saving data
comb_df.to_csv(NATO_file)
nato_data = comb_df
end



*************EDA Data************
python:
#Required Packages: pandas, openpyxl, warnings

import warnings

#list of links to use to read files, most recent files first
links = ['https://eda.europa.eu/docs/default-source/brochures/defence-data-2021.xlsx',
	'https://eda.europa.eu/docs/default-source/brochures/data-by-pms-for-the-eda-website.xlsx',
    'https://eda.europa.eu/docs/default-source/documents/eda-collective-and-national-defence-data-2017-2019-(excel).xlsx',
    'https://eda.europa.eu/docs/default-source/documents/defence-data/eda-collective-and-national-defence-data-2005-2017e-(excel).xlsx']



#filename for output, saves in local directory
superFile = eda_file

cleaned_files = []
i = 0
for link in links:
    i += 1
    print(f"analyzing file {link}")
    #importing file from link
    url = link
    r = requests.get(url)
    with open('raw/temp.xlsx', 'wb') as temp:
        temp.write(r.content)
        xl_file = pd.ExcelFile('raw/temp.xlsx')
    sheet_names = xl_file.sheet_names

    print(f"Sheet Names: {sheet_names}")
    totalDF = pd.DataFrame()
    for x in sheet_names:
        df = pd.read_excel(xl_file,sheet_name=x)

        cols = list(df.columns)
        cols2 = cols.copy()

        #getting rid of notes lines
        df = df.dropna(how='any')

        # turning data into long format to differentiate by year
        df = pd.melt(df, id_vars=cols[0], value_vars=cols2.remove(cols2[0]),
                          var_name='year', value_name='value')

        #turning data into wide format to turn each variable into a column
        with warnings.catch_warnings(): 
		#note, the line below triggers a futurewarning that appears to be no longer necessary, warning has been surpressed
            warnings.simplefilter("ignore")
            df = df.pivot(index = "year",columns=cols[0], values='value') 
        df['country'] = cols[0]
        df = df.reset_index(level = 'year')

        #removing weird duplicate year columns (bug-fix for 2021 data)
        for c in list(df.columns):
            if c == cols[0] or c == "Values":
                df = df.drop(c, axis = 1)

        
        #rearranging column order so country name is first
        cols3 = list(df.columns)
        cols3 = cols3[-1:] + cols3[:-1]
        df = df[cols3]

        #appending to final dataframe
        totalDF = pd.concat([totalDF,df])

	
    print("cleaning year")
    #cleaning year
    year = list(totalDF['year'])
    yearNew = []
    for item in year:
        item = str(item)
        item = item.replace('e', '')
        yearNew.append(float(item))
    totalDF['year'] = yearNew
    totalDF['year'] = totalDF['year'].astype(float)

    print("combining duplicated columns with superscript") 
    #combining duplicated columns with superscript:
    select = [x for x in totalDF.columns if x != "country"]
    for s in select:
        totalDF[s] = pd.to_numeric(totalDF[s], errors='coerce')
    superscript = '²³¹⁴⁶⁵'
    for col in totalDF.columns:
        for l in superscript:
            if l in col:
                totalDF = totalDF.rename({col: col.replace(l,'')}, axis = 1)
    for col in totalDF.columns:
        totalDF = totalDF.rename({col: col.strip()}, axis=1)
    totalDF = totalDF.groupby(level=0, axis=1).sum(min_count = 1)


    print("cleaning column names\n\n")
    #cleaning column names
    for col in totalDF.columns:
        if 'European Collaborative Defence Equipment Procurement' in col or 'European Collaborative Defence Procurement' in col:
            totalDF = totalDF.rename({col: 'European Collaborative Defence Equipment Procurement Expenditure'}, axis = 1)
        elif 'European Collaborative Defence R&T' in col:
            #print('Europe R&T1: ' + col)
            if col != 'European Collaborative Defence R&T Expenditure' or 'European Collaborative Defence R&T Expenditure' not in totalDF.columns:
                #print('Europe R&T: ' + col)
                totalDF = totalDF.rename({col: 'European Collaborative Defence R&T Expenditure'}, axis = 1)
        elif 'Collaborative Defence Equipment Procurement' in col or 'Collaborative Defence Procurement Expenditure' in col:
            totalDF = totalDF.rename({col: 'Collaborative Defence Equipment Procurement Expenditure'}, axis = 1)
        elif 'Collaborative Defence R&T' in col:
            if col != 'Collaborative Defence R&T' or 'Collaborative Defence R&T' not in totalDF.columns:
                totalDF = totalDF.rename({col: 'Collaborative Defence R&T Expenditure'}, axis = 1)
        elif ('Defence Research and Technology (R&T' in col or 'Defence R&T Expenditure' in col) and 'Defence Research and Technology (R&T) Expenditure' not in totalDF.columns:
            totalDF = totalDF.rename({col: 'Defence Research and Technology (R&T) Expenditure'}, axis = 1)
        elif ('Defence Research and Development (R&D' in col  or 'Defence R&D Expenditure' in col)and 'Defence Research and Development (R&D) Expenditure' not in totalDF.columns:
            totalDF = totalDF.rename({col: 'Defence Research and Development (R&D) Expenditure'}, axis=1)
        elif 'Defence Equipment Procurement' in col and 'Defence Equipment Procurement Expenditure' not in totalDF.columns:
            totalDF = totalDF.rename({col: 'Defence Equipment Procurement Expenditure'}, axis=1)
        elif 'Defence Expenditure as % of GDP' in col:
            totalDF = totalDF.rename({col: 'Total Defence Expenditure as % of GDP'}, axis=1)
        elif 'Defence Expenditure as % of Government Expenditure' in col or 'Defence Expenditure as % of Government Spending' in col:
            totalDF = totalDF.rename({col: 'Total Defence Expenditure as % of Government Expenditure'}, axis=1)
        elif 'Defence Expenditure per capita' in col and 'Total Defence Expenditure per capita' not in totalDF.columns:
            totalDF = totalDF.rename({col: 'Total Defence Expenditure per capita'}, axis=1)
        elif 'Defence Investment' in col and 'Defence Investment' not in totalDF.columns:
            totalDF = totalDF.rename({col: 'Defence Investment'}, axis=1)
        elif 'Total Defence Expenditure' in col and 'Total Defence Expenditure' not in totalDF.columns:
            totalDF = totalDF.rename({col: 'Total Defence Expenditure'}, axis=1)


    #Adding full year df to cleaned_files list
    cleaned_files.append(totalDF)

#combining files
comb_df = pd.DataFrame()
for i in range(len(cleaned_files)):
    df = cleaned_files[i]
    if i == 0:
        comb_df = df
    else:
        for index, row in df.iterrows():
            if not ((comb_df['country'] == row['country']) & (comb_df['year'] == row['year'])).any():
                try:
                    row_df = row.to_frame().T
                    comb_df = pd.concat([comb_df,row_df], ignore_index = True)
                except:
                    df = df.loc[:,df.columns.duplicated()]
                    print(df.columns)
                    print('quitting')
                    quit()

print(f"Final Value Columns:\n {comb_df.columns}")
comb_df['year1'] = -comb_df['year']
comb_df = comb_df.sort_values(by = ['year1','country'])
comb_df = comb_df.drop(['year1'], axis = 1)
comb_df = comb_df.reset_index(drop = True)
comb_df.to_csv(superFile)
eda_data = comb_df
end



************Combining Both into LTC Becker-Friendly Format***********

python:
#name of final file. Once again, file should be saved in local stata directory
combined_filename = combined_filename

"""
Of note here, there is about 20 variables that do are only in eda data releases from early years. Those variables have been preserved in the eda-only file, but are dropped here for the sake file clarity.
"""

#renaming/cleaning eda data
eda_data = eda_data.rename({
            'Total Defence Expenditure' : 'eda_milex',
            'Total Defence Expenditure as % of GDP' : 'eda_milburden' ,
            'Total Defence Expenditure as % of Government Expenditure':'eda_milshare_budget',
			'Total Defence Expenditure per capita':'eda_milex_cap',
			'Defence Investment':'eda_equipment_euros',
			'Defence Equipment Procurement Expenditure':'eda_procurement_euros',
			'Defence Research and Development (R&D) Expenditure':'eda_rnd',
			'Defence Research and Technology (R&T) Expenditure':'eda_rnt',
			'Collaborative Defence Equipment Procurement Expenditure': 'eda_collaborativeprocurement',
			'European Collaborative Defence Equipment Procurement Expenditure' : 'eda_collaborativeprocurement_eu',
			'Collaborative Defence R&T Expenditure' : 'eda_collaborative_rt',
			'European Collaborative Defence R&T Expenditure' : 'eda_collaborative_rt_eu'
        }, axis=1)

eda_cols = ['eda_milex','eda_milburden','eda_milshare_budget','eda_milex_cap',
'eda_equipment_euros','eda_rnd','eda_rnt','eda_collaborativeprocurement',
'eda_collaborativeprocurement_eu','eda_collaborative_rt','eda_collaborative_rt_eu','country','year']

for col in list(eda_data.columns):
	if col not in eda_cols:
		eda_data = eda_data.drop(col, axis = 1)
		




#renaming/cleaning nato data
nato_data = nato_data.rename({
            'Defence_expenditure_millions_USD' : 'milex_usd_current',
            'Defence_expenditure_millions_USD_2015_prices' : 'milex_usd_2015' ,
			'Defence_expenditure_millions_USD_2010_prices' : 'milex_usd_2010' ,
            'Defence_expenditure_Share_of_real_GDP_(%)':'milburden',
			'Defence_expenditure_in_millions_national_currency': 'milex_local',
            'Defence_expenditure_millions_national_currency_2015_prices' : 'milex_local_2015',
            'Defence_expenditure_Annual_real_change_(%)': 'milburden_change',
            'GDP_per_capita_(thousand_US_dollars)' : 'GDP_per_capita',
            'Defence_expenditure_millions_national_currency_2010_prices' :  'milex_local_2010',
			'Real_GDP_Billion_US_dollars_(2015_prices_and_exchange_rates)' : 'GDP_billion_usd_2015',
			'Defence_expenditure_per_capita_(US_dollars)' : 'milex_per_capita_usd',
			'Real_GDP_Billion_US_dollars_(2010_prices_and_exchange_rates)' : 'GDP_billion_usd_2010'
			
		
		}, axis=1)

#Cleaning specific NATO Country name differences
nato_data.loc[nato_data['country'] == 'Slovak Republic', 'country'] = 'Slovakia'
nato_data.loc[nato_data['country'] == 'North Macedonia', 'country'] = 'Macedonia'


comb_df = nato_data.merge(eda_data, how = "outer", on = ['country','year'])

for col in comb_df.columns:
	if col != "country":
		print(col)
		comb_df[col] = pd.to_numeric(comb_df[col])

var_labels = {
			'milex_usd_current' : 'Defence_expenditure_millions_USD',
            'milex_usd_2015' : 'Defence expenditure millions USD 2015 prices' ,
			'milex_usd_2010' :  'Defence expenditure millions USD 2010 prices',
            'milburden' : 'Defence expenditure Share of real GDP(%)',
			'milex_local': 'Defence expenditure in millions national currency',
            'milex_local_2015' : 'Defence expenditure in millions national currency 2015 prices',
            'milburden_change': 'Defence expenditure annual real change(% gdp)',
            'GDP_per_capita' : 'thousands usd',
             'milex_local_2010':  'Defence expenditure millions national currency 2010 prices',
			 'GDP_billion_usd_2015':  'Real GDP Billion US dollars (2015_prices_and_exchange_rates)',
			'milex_per_capita_usd': 'Defence expenditure per capita (US dollars)' ,
			'GDP_billion_usd_2010' : 'Real GDP Billion US dollars (2010_prices_and_exchange_rates)',
			'eda_milex' : 'Total Defence Expenditure',
            'eda_milburden' : 'Total Defence Expenditure as % of GDP',
            'eda_milshare_budget' : 'Total Defence Expenditure as % of Government Expenditure',
			'eda_milex_cap' : 'Total Defence Expenditure per capita',
			'eda_equipment_euros' : 'Defence Investment',
			'eda_procurement_euros' : 'Defence Equipment Procurement Expenditure',
			'eda_rnd' : 'Defence Research and Development (R&D) Expenditure',
			'eda_rnt' : 'Defence Research and Technology (R&T) Expenditure',
			'eda_collaborativeprocurement' : 'Collaborative Defence Equipment Procurement Expenditure',
			'eda_collaborativeprocurement_eu' : 'European Collaborative Defence Equipment Procurement Expenditure',
			'eda_collaborative_rt' : 'Collaborative Defence R&T Expenditure',
			'eda_collaborative_rt_eu' : 'European Collaborative Defence R&T Expenditure'
}
		
comb_df.to_stata(combined_filename, version = 118, variable_labels = var_labels)


end